# -*- coding: utf-8 -*-
"""
Created on Sat Dec  3 16:02:06 2022

@author: Lu_Cool
"""

import pandas as pd
import re
import numpy as np


df_eps = pd.read_excel(r"E:\Data\wind_resset_csmar_IDs2.xls")
df_sim = pd.read_excel(r"E:\Data\similarity_ranking_rst.xlsx").drop(
    'Unnamed: 0', axis=1).rename(
    columns={'group_nums': 'clusterID', 'ib': 'IB'}
)

# 通过标题中的关键信息匹配
df_eps['key'] = df_eps['x'].str.findall('.*?/(.*?)（.*）').map(lambda x: x[0])
df_sim['key'] = df_sim['path'].str.findall('(.*?)（.*）').map(lambda x: x[0])

merge_df = pd.merge(left=df_sim, right=df_eps.loc[:, [
         '发布时间_ReleDt', 'x', 'key', '每股收益元股_EPS']], how='left', on='key')

rst_df = merge_df[~merge_df['每股收益元股_EPS'].isna()]

save_path = r'E:\Data\sim_with_eps.xlsx'
rst_df.to_excel(save_path)
